###### Appendix D: All Tables
###### Balance Tables and Summary Statistics
gc(); rm(list = ls()); set.seed(12345)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # Note: if you are not using R Studio this command will not work, set WD to source file location manually


packages <- c("stargazer")

lapply(packages, require, character.only = T)
source("functions.R")

vatMerge <- read.csv("data/cleaned/uganda_jun17_cleaned.csv", 
                     stringsAsFactors = F)
ugField <- read.csv("data/cleaned/uganda_jun18_cleaned.csv", 
                    stringsAsFactors = F)


#######################################################################################
#######################################################################################
############################# BALANCE TESTS AND SUMMARY STATS #########################
#######################################################################################
#######################################################################################

#######################################################################################
############################# VAT EXPERIMENT  ###########################################
#######################################################################################

## Dataset
whichRound <- 2
dfVar <- read.csv("data/other/labexp_balvar_info.csv", stringsAsFactors = F)
dfVar <- dfVar[dfVar$include == 1, ] # If you uncomment this then you will take only the common variables across all datasets
treatInc <- c("Direct Tax", "Windfall", 
              "Weak Hidden VAT", "Strong Hidden VAT")
dfUsing <- vatMerge[vatMerge$round == whichRound & vatMerge$treatment_pooled %in% treatInc
                    & vatMerge$role == "C", ] # Subset to subject-level and take only treatment conditons analyzed in data
dfUsing$vote_orig <- dfUsing$vote
dfUsing$vote <- ifelse(dfUsing$vote_orig == 1, 1, 0)
dfUsing$treatment_pooled[dfUsing$treatment_pooled == "Weak Hidden VAT"] <- "Visible VAT"
dfUsing$treatment_pooled[dfUsing$treatment_pooled == "Strong Hidden VAT"] <- "Hidden VAT"
levels(factor(dfUsing$treatment_pooled))

## Variables

treatVar <- "treatment_pooled"
dataset <- "ug_jun17" # Note that this has to be a column name in the dfVar frame

## Summary Table

whichCols <- colnames(dfUsing)[colnames(dfUsing) %in% dfVar[, dataset]]
dfBal <- dfUsing[dfUsing$round == whichRound, whichCols]
colnames(dfBal) <- dfVar$label[match(whichCols, dfVar[, dataset])]

## Simple means by treatment condition

balUse <- whichCols[whichCols %in% dfVar[, dataset][dfVar$bal == 1]]
balNames <- dfVar$label[match(balUse, dfVar[, dataset])]
balTab <- round(t(sapply(balUse, FUN = function(x){tapply(dfUsing[, x], dfUsing[, treatVar], mean, na.rm = T)})), digits = 2)
# Note that you need to run balTab before the balance test because you may need to switch signs of variables for FDR correction

## Balance Test

switchVars <- c("pov_food", "pov_water") # Switch variables such that "more = better"
dfUsing[, switchVars] <- -1 * dfUsing[, switchVars] 
balTest <- sapply(balUse, FUN = function(x){
  modSum <- summary(lm(make_equation(x, treatVar), data = dfUsing))
  return(pf(modSum$fstatistic[1], # This is the f statistic object, pf is the cumulative distribution function of the f statistic and returns the p value
            modSum$fstatistic[2], 
            modSum$fstatistic[3], 
            lower.tail = F))
})

balFinal <- as.data.frame(round(cbind(balTab, balTest, p.adjust(balTest, method = "BH")), digits = 2))
colnames(balFinal) <- c(colnames(balTab), "F-test p", "FDR q")
rownames(balFinal) <- balNames

### TABLE D3: Summary Statistics
stargazer(dfBal, summary = T, digits = 2, 
          summary.stat = c("mean", "sd", "min", "max", "n"))

### TABLE D4: Balance Tests
stargazer(balFinal, summary=F, digits = 2)

#######################################################################################
############################# UGANDA FIELD EXPERIMENT #################################
#######################################################################################

require(stargazer)

## Dataset

dfVar <- read.csv("data/other/labexp_balvar_info.csv", stringsAsFactors = F)
dfVar <- dfVar[dfVar$include == 1 & dfVar$bal == 1, ]
dfUsing <- ugField[!is.na(ugField$rand_vm), ]

## Variables

treatVar <- "rand_vm"
dataset <- "ug_jun18" # Note that this has to be a column name in the dfVar frame

## Summary Table

whichCols <- colnames(dfUsing)[colnames(dfUsing) %in% dfVar[, dataset]]
dfBal <- dfUsing[, whichCols]
colnames(dfBal) <- dfVar$label[match(whichCols, dfVar[, dataset])]

## Simple means by treatment condition

balUse <- whichCols[whichCols %in% dfVar[, dataset][dfVar$bal == 1]]
balNames <- dfVar$label[match(balUse, dfVar[, dataset])]
balTab <- round(t(sapply(balUse, FUN = function(x){tapply(dfUsing[, x], dfUsing[, treatVar], mean, na.rm = T)})), digits = 2)
# Note that you need to run balTab before the balance test because you may need to switch signs of variables for FDR correction

## Balance Test

switchVars <- c("ptc_pov_food", "ptc_pov_water") # Switch variables such that "more = better"
dfUsing[, switchVars] <- -1 * dfUsing[, switchVars] 
balTest <- sapply(balUse, FUN = function(x){
  modSum <- summary(lm(make_equation(x, treatVar), data = dfUsing))
  return(pf(modSum$fstatistic[1], # This is the f statistic object, pf is the cumulative distribution function of the f statistic and returns the p value
            modSum$fstatistic[2], 
            modSum$fstatistic[3], 
            lower.tail = F))
})

balFinal <- as.data.frame(round(cbind(balTab, balTest, p.adjust(balTest, method = "BH")), digits = 2))
colnames(balFinal) <- c(colnames(balTab), 
                        "F-test p", "FDR q")
rownames(balFinal) <- balNames

## Sample allocation
sampTab <- table(dfUsing[, treatVar])
rownames(sampTab) <- colnames(balTab)

## Table D5: Survey Experiment: Descriptive Statistics
stargazer(dfBal, summary = T, digits = 2, 
          summary.stat = c("mean", "sd", "min", "max", "n"))

## Table D6: Survey Experiment: Balance Tests
stargazer(balFinal, summary = F, type = "latex", digits = 2)
